In [1]:
import pandas as pd
linkData="https://github.com/SocialAnalytics-StrategicIntelligence/TableOperations/raw/main/dengue_ok.pkl"

dengue = pd.read_pickle(linkData)

# checking format
dengue.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 501236 entries, 0 to 501235
Data columns (total 9 columns):
 #   Column        Non-Null Count   Dtype   
---  ------        --------------   -----   
 0   departamento  501236 non-null  object  
 1   provincia     501236 non-null  object  
 2   distrito      501236 non-null  object  
 3   ano           501236 non-null  int64   
 4   semana        501236 non-null  int64   
 5   sexo          501236 non-null  object  
 6   edad          501236 non-null  int64   
 7   enfermedad    501236 non-null  category
 8   case          501236 non-null  int64   
dtypes: category(1), int64(4), object(4)
memory usage: 31.1+ MB
In [2]:
# some exploration
dengue.describe().apply(lambda s: s.apply('{0:.5f}'.format))
Out[2]:
ano semana edad case
count 501236.00000 501236.00000 501236.00000 501236.00000
mean 2014.77213 21.99838 28.96143 1.00000
std 6.14646 14.76658 18.15954 0.00000
min 2000.00000 1.00000 0.00000 1.00000
25% 2011.00000 11.00000 15.00000 1.00000
50% 2016.00000 18.00000 26.00000 1.00000
75% 2020.00000 32.00000 41.00000 1.00000
max 2022.00000 53.00000 106.00000 1.00000
In [3]:
# exploring
dengue.enfermedad.value_counts()
Out[3]:
enfermedad
SIN_SEÑALES    443996
ALARMA          54981
GRAVE            2259
Name: count, dtype: int64
In [4]:
dengue['enfermedad_text']=dengue.enfermedad.astype(str)

dengue.replace({'enfermedad_text':{'SIN_SEÑALES':'1_SIN_SEÑALES','ALARMA':'2_ALARMA','GRAVE':'3_GRAVE'}},inplace=True)
In [5]:
# exploring
dengue.ano.value_counts(sort=False)
Out[5]:
ano
2000     5557
2001    23526
2002     8086
2003     3349
2004     9547
2005     5640
2006     4022
2007     6344
2008    12824
2009    13407
2010    16842
2011    28084
2012    28505
2013    13092
2015    35816
2014    17234
2016    25160
2017    68279
2018     4698
2019    15287
2020    47932
2021    44791
2022    63214
Name: count, dtype: int64
In [6]:
binLimits=[0,15,50,110]
theLabels=["a_menor_a_16","b_entre_16y50","c_mayor_a_50"]
dengue["edad_grupos"]=pd.cut(dengue['edad'], include_lowest=True,
                                     bins=binLimits,
                                     labels=theLabels,
                                     ordered=True)

# see

dengue.head()
Out[6]:
departamento provincia distrito ano semana sexo edad enfermedad case enfermedad_text edad_grupos
0 HUANUCO LEONCIO PRADO LUYANDO 2000 47 M 9 SIN_SEÑALES 1 1_SIN_SEÑALES a_menor_a_16
1 HUANUCO LEONCIO PRADO LUYANDO 2000 40 F 18 SIN_SEÑALES 1 1_SIN_SEÑALES b_entre_16y50
2 HUANUCO LEONCIO PRADO JOSE CRESPO Y CASTILLO 2000 48 F 32 SIN_SEÑALES 1 1_SIN_SEÑALES b_entre_16y50
3 HUANUCO LEONCIO PRADO JOSE CRESPO Y CASTILLO 2000 37 F 40 SIN_SEÑALES 1 1_SIN_SEÑALES b_entre_16y50
4 HUANUCO LEONCIO PRADO MARIANO DAMASO BERAUN 2000 42 M 16 SIN_SEÑALES 1 1_SIN_SEÑALES b_entre_16y50
In [7]:
pd.crosstab( dengue.enfermedad_text,dengue.edad_grupos, dropna=False, normalize='columns')
Out[7]:
edad_grupos a_menor_a_16 b_entre_16y50 c_mayor_a_50
enfermedad_text
1_SIN_SEÑALES 0.876868 0.890247 0.884549
2_ALARMA 0.119010 0.105475 0.109204
3_GRAVE 0.004122 0.004278 0.006248
In [8]:
pd.crosstab(dengue.enfermedad_text,[dengue.sexo,dengue.edad_grupos], dropna=False, normalize='columns')
Out[8]:
sexo F M
edad_grupos a_menor_a_16 b_entre_16y50 c_mayor_a_50 a_menor_a_16 b_entre_16y50 c_mayor_a_50
enfermedad_text
1_SIN_SEÑALES 0.875221 0.884646 0.881328 0.878431 0.897139 0.888295
2_ALARMA 0.120614 0.110622 0.113616 0.117488 0.099142 0.104073
3_GRAVE 0.004165 0.004731 0.005057 0.004081 0.003720 0.007633

Weekly

In [9]:
!pip install altair -U
!pip install "vegafusion-jupyter[embed]"
Requirement already satisfied: altair in /usr/local/lib/python3.10/dist-packages (5.4.1)
Requirement already satisfied: jinja2 in /usr/local/lib/python3.10/dist-packages (from altair) (3.1.4)
Requirement already satisfied: jsonschema>=3.0 in /usr/local/lib/python3.10/dist-packages (from altair) (4.19.2)
Requirement already satisfied: narwhals>=1.5.2 in /usr/local/lib/python3.10/dist-packages (from altair) (1.6.0)
Requirement already satisfied: packaging in /usr/local/lib/python3.10/dist-packages (from altair) (24.1)
Requirement already satisfied: typing-extensions>=4.10.0 in /usr/local/lib/python3.10/dist-packages (from altair) (4.12.2)
Requirement already satisfied: attrs>=22.2.0 in /usr/local/lib/python3.10/dist-packages (from jsonschema>=3.0->altair) (23.2.0)
Requirement already satisfied: jsonschema-specifications>=2023.03.6 in /usr/local/lib/python3.10/dist-packages (from jsonschema>=3.0->altair) (2023.12.1)
Requirement already satisfied: referencing>=0.28.4 in /usr/local/lib/python3.10/dist-packages (from jsonschema>=3.0->altair) (0.35.1)
Requirement already satisfied: rpds-py>=0.7.1 in /usr/local/lib/python3.10/dist-packages (from jsonschema>=3.0->altair) (0.19.0)
Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.10/dist-packages (from jinja2->altair) (2.1.5)
Requirement already satisfied: vegafusion-jupyter[embed] in /usr/local/lib/python3.10/dist-packages (1.6.9)
Requirement already satisfied: ipywidgets<9,>=7.0.0 in /usr/local/lib/python3.10/dist-packages (from vegafusion-jupyter[embed]) (7.7.1)
Requirement already satisfied: altair>=4.2.0 in /usr/local/lib/python3.10/dist-packages (from vegafusion-jupyter[embed]) (5.4.1)
Requirement already satisfied: vegafusion==1.6.9 in /usr/local/lib/python3.10/dist-packages (from vegafusion-jupyter[embed]) (1.6.9)
Requirement already satisfied: vegafusion-python-embed==1.6.9 in /usr/local/lib/python3.10/dist-packages (from vegafusion-jupyter[embed]) (1.6.9)
Requirement already satisfied: vl-convert-python>=0.7.0 in /usr/local/lib/python3.10/dist-packages (from vegafusion-jupyter[embed]) (1.6.1)
Requirement already satisfied: pyarrow>=5 in /usr/local/lib/python3.10/dist-packages (from vegafusion==1.6.9->vegafusion-jupyter[embed]) (14.0.2)
Requirement already satisfied: pandas in /usr/local/lib/python3.10/dist-packages (from vegafusion==1.6.9->vegafusion-jupyter[embed]) (2.0.3)
Requirement already satisfied: psutil in /usr/local/lib/python3.10/dist-packages (from vegafusion==1.6.9->vegafusion-jupyter[embed]) (5.9.5)
Requirement already satisfied: protobuf in /usr/local/lib/python3.10/dist-packages (from vegafusion==1.6.9->vegafusion-jupyter[embed]) (3.20.3)
Requirement already satisfied: jinja2 in /usr/local/lib/python3.10/dist-packages (from altair>=4.2.0->vegafusion-jupyter[embed]) (3.1.4)
Requirement already satisfied: jsonschema>=3.0 in /usr/local/lib/python3.10/dist-packages (from altair>=4.2.0->vegafusion-jupyter[embed]) (4.19.2)
Requirement already satisfied: narwhals>=1.5.2 in /usr/local/lib/python3.10/dist-packages (from altair>=4.2.0->vegafusion-jupyter[embed]) (1.6.0)
Requirement already satisfied: packaging in /usr/local/lib/python3.10/dist-packages (from altair>=4.2.0->vegafusion-jupyter[embed]) (24.1)
Requirement already satisfied: typing-extensions>=4.10.0 in /usr/local/lib/python3.10/dist-packages (from altair>=4.2.0->vegafusion-jupyter[embed]) (4.12.2)
Requirement already satisfied: ipykernel>=4.5.1 in /usr/local/lib/python3.10/dist-packages (from ipywidgets<9,>=7.0.0->vegafusion-jupyter[embed]) (5.5.6)
Requirement already satisfied: ipython-genutils~=0.2.0 in /usr/local/lib/python3.10/dist-packages (from ipywidgets<9,>=7.0.0->vegafusion-jupyter[embed]) (0.2.0)
Requirement already satisfied: traitlets>=4.3.1 in /usr/local/lib/python3.10/dist-packages (from ipywidgets<9,>=7.0.0->vegafusion-jupyter[embed]) (5.7.1)
Requirement already satisfied: widgetsnbextension~=3.6.0 in /usr/local/lib/python3.10/dist-packages (from ipywidgets<9,>=7.0.0->vegafusion-jupyter[embed]) (3.6.7)
Requirement already satisfied: ipython>=4.0.0 in /usr/local/lib/python3.10/dist-packages (from ipywidgets<9,>=7.0.0->vegafusion-jupyter[embed]) (7.34.0)
Requirement already satisfied: jupyterlab-widgets>=1.0.0 in /usr/local/lib/python3.10/dist-packages (from ipywidgets<9,>=7.0.0->vegafusion-jupyter[embed]) (3.0.11)
Requirement already satisfied: jupyter-client in /usr/local/lib/python3.10/dist-packages (from ipykernel>=4.5.1->ipywidgets<9,>=7.0.0->vegafusion-jupyter[embed]) (6.1.12)
Requirement already satisfied: tornado>=4.2 in /usr/local/lib/python3.10/dist-packages (from ipykernel>=4.5.1->ipywidgets<9,>=7.0.0->vegafusion-jupyter[embed]) (6.3.3)
Requirement already satisfied: setuptools>=18.5 in /usr/local/lib/python3.10/dist-packages (from ipython>=4.0.0->ipywidgets<9,>=7.0.0->vegafusion-jupyter[embed]) (71.0.4)
Requirement already satisfied: jedi>=0.16 in /usr/local/lib/python3.10/dist-packages (from ipython>=4.0.0->ipywidgets<9,>=7.0.0->vegafusion-jupyter[embed]) (0.19.1)
Requirement already satisfied: decorator in /usr/local/lib/python3.10/dist-packages (from ipython>=4.0.0->ipywidgets<9,>=7.0.0->vegafusion-jupyter[embed]) (4.4.2)
Requirement already satisfied: pickleshare in /usr/local/lib/python3.10/dist-packages (from ipython>=4.0.0->ipywidgets<9,>=7.0.0->vegafusion-jupyter[embed]) (0.7.5)
Requirement already satisfied: prompt-toolkit!=3.0.0,!=3.0.1,<3.1.0,>=2.0.0 in /usr/local/lib/python3.10/dist-packages (from ipython>=4.0.0->ipywidgets<9,>=7.0.0->vegafusion-jupyter[embed]) (3.0.47)
Requirement already satisfied: pygments in /usr/local/lib/python3.10/dist-packages (from ipython>=4.0.0->ipywidgets<9,>=7.0.0->vegafusion-jupyter[embed]) (2.16.1)
Requirement already satisfied: backcall in /usr/local/lib/python3.10/dist-packages (from ipython>=4.0.0->ipywidgets<9,>=7.0.0->vegafusion-jupyter[embed]) (0.2.0)
Requirement already satisfied: matplotlib-inline in /usr/local/lib/python3.10/dist-packages (from ipython>=4.0.0->ipywidgets<9,>=7.0.0->vegafusion-jupyter[embed]) (0.1.7)
Requirement already satisfied: pexpect>4.3 in /usr/local/lib/python3.10/dist-packages (from ipython>=4.0.0->ipywidgets<9,>=7.0.0->vegafusion-jupyter[embed]) (4.9.0)
Requirement already satisfied: attrs>=22.2.0 in /usr/local/lib/python3.10/dist-packages (from jsonschema>=3.0->altair>=4.2.0->vegafusion-jupyter[embed]) (23.2.0)
Requirement already satisfied: jsonschema-specifications>=2023.03.6 in /usr/local/lib/python3.10/dist-packages (from jsonschema>=3.0->altair>=4.2.0->vegafusion-jupyter[embed]) (2023.12.1)
Requirement already satisfied: referencing>=0.28.4 in /usr/local/lib/python3.10/dist-packages (from jsonschema>=3.0->altair>=4.2.0->vegafusion-jupyter[embed]) (0.35.1)
Requirement already satisfied: rpds-py>=0.7.1 in /usr/local/lib/python3.10/dist-packages (from jsonschema>=3.0->altair>=4.2.0->vegafusion-jupyter[embed]) (0.19.0)
Requirement already satisfied: numpy>=1.16.6 in /usr/local/lib/python3.10/dist-packages (from pyarrow>=5->vegafusion==1.6.9->vegafusion-jupyter[embed]) (1.25.2)
Requirement already satisfied: notebook>=4.4.1 in /usr/local/lib/python3.10/dist-packages (from widgetsnbextension~=3.6.0->ipywidgets<9,>=7.0.0->vegafusion-jupyter[embed]) (6.5.5)
Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.10/dist-packages (from jinja2->altair>=4.2.0->vegafusion-jupyter[embed]) (2.1.5)
Requirement already satisfied: python-dateutil>=2.8.2 in /usr/local/lib/python3.10/dist-packages (from pandas->vegafusion==1.6.9->vegafusion-jupyter[embed]) (2.8.2)
Requirement already satisfied: pytz>=2020.1 in /usr/local/lib/python3.10/dist-packages (from pandas->vegafusion==1.6.9->vegafusion-jupyter[embed]) (2023.4)
Requirement already satisfied: tzdata>=2022.1 in /usr/local/lib/python3.10/dist-packages (from pandas->vegafusion==1.6.9->vegafusion-jupyter[embed]) (2024.1)
Requirement already satisfied: parso<0.9.0,>=0.8.3 in /usr/local/lib/python3.10/dist-packages (from jedi>=0.16->ipython>=4.0.0->ipywidgets<9,>=7.0.0->vegafusion-jupyter[embed]) (0.8.4)
Requirement already satisfied: pyzmq<25,>=17 in /usr/local/lib/python3.10/dist-packages (from notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets<9,>=7.0.0->vegafusion-jupyter[embed]) (24.0.1)
Requirement already satisfied: argon2-cffi in /usr/local/lib/python3.10/dist-packages (from notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets<9,>=7.0.0->vegafusion-jupyter[embed]) (23.1.0)
Requirement already satisfied: jupyter-core>=4.6.1 in /usr/local/lib/python3.10/dist-packages (from notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets<9,>=7.0.0->vegafusion-jupyter[embed]) (5.7.2)
Requirement already satisfied: nbformat in /usr/local/lib/python3.10/dist-packages (from notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets<9,>=7.0.0->vegafusion-jupyter[embed]) (5.10.4)
Requirement already satisfied: nbconvert>=5 in /usr/local/lib/python3.10/dist-packages (from notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets<9,>=7.0.0->vegafusion-jupyter[embed]) (6.5.4)
Requirement already satisfied: nest-asyncio>=1.5 in /usr/local/lib/python3.10/dist-packages (from notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets<9,>=7.0.0->vegafusion-jupyter[embed]) (1.6.0)
Requirement already satisfied: Send2Trash>=1.8.0 in /usr/local/lib/python3.10/dist-packages (from notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets<9,>=7.0.0->vegafusion-jupyter[embed]) (1.8.3)
Requirement already satisfied: terminado>=0.8.3 in /usr/local/lib/python3.10/dist-packages (from notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets<9,>=7.0.0->vegafusion-jupyter[embed]) (0.18.1)
Requirement already satisfied: prometheus-client in /usr/local/lib/python3.10/dist-packages (from notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets<9,>=7.0.0->vegafusion-jupyter[embed]) (0.20.0)
Requirement already satisfied: nbclassic>=0.4.7 in /usr/local/lib/python3.10/dist-packages (from notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets<9,>=7.0.0->vegafusion-jupyter[embed]) (1.1.0)
Requirement already satisfied: ptyprocess>=0.5 in /usr/local/lib/python3.10/dist-packages (from pexpect>4.3->ipython>=4.0.0->ipywidgets<9,>=7.0.0->vegafusion-jupyter[embed]) (0.7.0)
Requirement already satisfied: wcwidth in /usr/local/lib/python3.10/dist-packages (from prompt-toolkit!=3.0.0,!=3.0.1,<3.1.0,>=2.0.0->ipython>=4.0.0->ipywidgets<9,>=7.0.0->vegafusion-jupyter[embed]) (0.2.13)
Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.10/dist-packages (from python-dateutil>=2.8.2->pandas->vegafusion==1.6.9->vegafusion-jupyter[embed]) (1.16.0)
Requirement already satisfied: platformdirs>=2.5 in /usr/local/lib/python3.10/dist-packages (from jupyter-core>=4.6.1->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets<9,>=7.0.0->vegafusion-jupyter[embed]) (4.2.2)
Requirement already satisfied: notebook-shim>=0.2.3 in /usr/local/lib/python3.10/dist-packages (from nbclassic>=0.4.7->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets<9,>=7.0.0->vegafusion-jupyter[embed]) (0.2.4)
Requirement already satisfied: lxml in /usr/local/lib/python3.10/dist-packages (from nbconvert>=5->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets<9,>=7.0.0->vegafusion-jupyter[embed]) (4.9.4)
Requirement already satisfied: beautifulsoup4 in /usr/local/lib/python3.10/dist-packages (from nbconvert>=5->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets<9,>=7.0.0->vegafusion-jupyter[embed]) (4.12.3)
Requirement already satisfied: bleach in /usr/local/lib/python3.10/dist-packages (from nbconvert>=5->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets<9,>=7.0.0->vegafusion-jupyter[embed]) (6.1.0)
Requirement already satisfied: defusedxml in /usr/local/lib/python3.10/dist-packages (from nbconvert>=5->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets<9,>=7.0.0->vegafusion-jupyter[embed]) (0.7.1)
Requirement already satisfied: entrypoints>=0.2.2 in /usr/local/lib/python3.10/dist-packages (from nbconvert>=5->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets<9,>=7.0.0->vegafusion-jupyter[embed]) (0.4)
Requirement already satisfied: jupyterlab-pygments in /usr/local/lib/python3.10/dist-packages (from nbconvert>=5->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets<9,>=7.0.0->vegafusion-jupyter[embed]) (0.3.0)
Requirement already satisfied: mistune<2,>=0.8.1 in /usr/local/lib/python3.10/dist-packages (from nbconvert>=5->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets<9,>=7.0.0->vegafusion-jupyter[embed]) (0.8.4)
Requirement already satisfied: nbclient>=0.5.0 in /usr/local/lib/python3.10/dist-packages (from nbconvert>=5->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets<9,>=7.0.0->vegafusion-jupyter[embed]) (0.10.0)
Requirement already satisfied: pandocfilters>=1.4.1 in /usr/local/lib/python3.10/dist-packages (from nbconvert>=5->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets<9,>=7.0.0->vegafusion-jupyter[embed]) (1.5.1)
Requirement already satisfied: tinycss2 in /usr/local/lib/python3.10/dist-packages (from nbconvert>=5->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets<9,>=7.0.0->vegafusion-jupyter[embed]) (1.3.0)
Requirement already satisfied: fastjsonschema>=2.15 in /usr/local/lib/python3.10/dist-packages (from nbformat->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets<9,>=7.0.0->vegafusion-jupyter[embed]) (2.20.0)
Requirement already satisfied: argon2-cffi-bindings in /usr/local/lib/python3.10/dist-packages (from argon2-cffi->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets<9,>=7.0.0->vegafusion-jupyter[embed]) (21.2.0)
Requirement already satisfied: jupyter-server<3,>=1.8 in /usr/local/lib/python3.10/dist-packages (from notebook-shim>=0.2.3->nbclassic>=0.4.7->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets<9,>=7.0.0->vegafusion-jupyter[embed]) (1.24.0)
Requirement already satisfied: cffi>=1.0.1 in /usr/local/lib/python3.10/dist-packages (from argon2-cffi-bindings->argon2-cffi->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets<9,>=7.0.0->vegafusion-jupyter[embed]) (1.16.0)
Requirement already satisfied: soupsieve>1.2 in /usr/local/lib/python3.10/dist-packages (from beautifulsoup4->nbconvert>=5->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets<9,>=7.0.0->vegafusion-jupyter[embed]) (2.5)
Requirement already satisfied: webencodings in /usr/local/lib/python3.10/dist-packages (from bleach->nbconvert>=5->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets<9,>=7.0.0->vegafusion-jupyter[embed]) (0.5.1)
Requirement already satisfied: pycparser in /usr/local/lib/python3.10/dist-packages (from cffi>=1.0.1->argon2-cffi-bindings->argon2-cffi->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets<9,>=7.0.0->vegafusion-jupyter[embed]) (2.22)
Requirement already satisfied: anyio<4,>=3.1.0 in /usr/local/lib/python3.10/dist-packages (from jupyter-server<3,>=1.8->notebook-shim>=0.2.3->nbclassic>=0.4.7->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets<9,>=7.0.0->vegafusion-jupyter[embed]) (3.7.1)
Requirement already satisfied: websocket-client in /usr/local/lib/python3.10/dist-packages (from jupyter-server<3,>=1.8->notebook-shim>=0.2.3->nbclassic>=0.4.7->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets<9,>=7.0.0->vegafusion-jupyter[embed]) (1.8.0)
Requirement already satisfied: idna>=2.8 in /usr/local/lib/python3.10/dist-packages (from anyio<4,>=3.1.0->jupyter-server<3,>=1.8->notebook-shim>=0.2.3->nbclassic>=0.4.7->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets<9,>=7.0.0->vegafusion-jupyter[embed]) (3.7)
Requirement already satisfied: sniffio>=1.1 in /usr/local/lib/python3.10/dist-packages (from anyio<4,>=3.1.0->jupyter-server<3,>=1.8->notebook-shim>=0.2.3->nbclassic>=0.4.7->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets<9,>=7.0.0->vegafusion-jupyter[embed]) (1.3.1)
Requirement already satisfied: exceptiongroup in /usr/local/lib/python3.10/dist-packages (from anyio<4,>=3.1.0->jupyter-server<3,>=1.8->notebook-shim>=0.2.3->nbclassic>=0.4.7->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets<9,>=7.0.0->vegafusion-jupyter[embed]) (1.2.2)
In [10]:
import altair as alt
alt.data_transformers.enable("vegafusion")
Out[10]:
DataTransformerRegistry.enable('vegafusion')
In [11]:
dengue = dengue[dengue['ano'] == 2017]
In [12]:
alt_dengue=alt.Chart(dengue)

enc_dengue=alt_dengue.encode(
    x='semana:O',
    y='mean(edad):Q',
    color='enfermedad_text:N',
)

enc_dengue.mark_line() + enc_dengue.mark_errorband()
Out[12]:
In [13]:
enc_dengue=alt_dengue.encode(
    x='semana:O',
    y='median(edad):Q',
    color='enfermedad_text:N',
    tooltip=['median(edad)','ano:T']
).interactive()

enc_dengue.mark_line().facet(
    row='sexo:N',
    column='edad_grupos:N'
)
Out[13]:
In [14]:
enc_dengue=alt_dengue.encode(
    x='semana:O',
    y='median(edad):Q',
    color='enfermedad_text:N',
    tooltip=['median(edad)','semana:O']
).interactive()

enc_dengue.mark_line().facet(
    row='sexo:N',
    column='edad_grupos:N'
)
Out[14]:
In [15]:
enc_dengue=alt_dengue.encode(
    x='semana:O',
    y=alt.Y('sum(case):Q'),
    color='enfermedad_text:N',
    tooltip=['sum(case):Q','semana:O']
).interactive()
enc_dengue.mark_line().facet(
    row='sexo:N',
    column='edad_grupos:N'
)
Out[15]:
In [16]:
enc_dengue=alt_dengue.encode(
    x='semana:O',
    y=alt.Y('sum(case):Q', scale=alt.Scale(type='log')),
    color='enfermedad_text:N',
    tooltip=['sum(case):Q','semana:O']
).interactive()

enc_dengue.mark_line().facet(
    row='sexo:N',
    column='edad_grupos:N'
)
Out[16]:
In [17]:
indexList=['edad_grupos','semana','sexo','enfermedad_text']
aggregator={'edad': ['median']}
LevelByWeek_medians=dengue.groupby(indexList,observed=True).agg(aggregator)
LevelByWeek_medians
Out[17]:
edad
median
edad_grupos semana sexo enfermedad_text
a_menor_a_16 1 F 1_SIN_SEÑALES 9.0
2_ALARMA 6.0
M 1_SIN_SEÑALES 11.0
2_ALARMA 12.5
3_GRAVE 11.0
... ... ... ... ...
c_mayor_a_50 52 F 1_SIN_SEÑALES 68.0
2_ALARMA 63.0
M 1_SIN_SEÑALES 59.5
2_ALARMA 62.0
3_GRAVE 69.0

726 rows × 1 columns

In [18]:
LevelByWeek_medians.unstack(['sexo','enfermedad_text'])
Out[18]:
edad
median
sexo F M
enfermedad_text 1_SIN_SEÑALES 2_ALARMA 3_GRAVE 1_SIN_SEÑALES 2_ALARMA 3_GRAVE
edad_grupos semana
a_menor_a_16 1 9.0 6.0 NaN 11.0 12.5 11.0
2 13.0 12.5 NaN 10.0 12.0 5.0
3 9.0 11.0 NaN 9.0 11.0 4.0
4 10.0 12.0 NaN 11.0 4.0 NaN
5 9.0 7.0 NaN 11.0 12.0 NaN
... ... ... ... ... ... ... ...
c_mayor_a_50 48 63.0 57.0 61.0 56.0 64.0 NaN
49 65.0 65.0 NaN 64.0 NaN 81.0
50 63.0 NaN NaN 57.0 56.0 NaN
51 58.0 75.0 NaN 59.0 73.0 NaN
52 68.0 63.0 NaN 59.5 62.0 69.0

156 rows × 6 columns

In [19]:
alt_dengue=alt.Chart(dengue)
enc_dengue=alt_dengue.encode(
    x='semana:O',
    y=alt.Y('sum(case):Q', scale=alt.Scale(type='log')),
    column='enfermedad_text:N',
)
enc_dengue.mark_bar()
Out[19]:
In [20]:
indexList=['edad_grupos','semana','sexo','enfermedad_text']
aggregator={'edad': ['median','mean','min','max']}
LevelByWeek_statsFull=dengue.groupby(indexList,observed=True).agg(aggregator)
LevelByWeek_statsFull
Out[20]:
edad
median mean min max
edad_grupos semana sexo enfermedad_text
a_menor_a_16 1 F 1_SIN_SEÑALES 9.0 8.750000 1 15
2_ALARMA 6.0 7.111111 2 14
M 1_SIN_SEÑALES 11.0 9.684211 1 15
2_ALARMA 12.5 10.125000 2 14
3_GRAVE 11.0 11.000000 11 11
... ... ... ... ... ... ... ...
c_mayor_a_50 52 F 1_SIN_SEÑALES 68.0 66.375000 51 78
2_ALARMA 63.0 62.666667 54 71
M 1_SIN_SEÑALES 59.5 63.666667 51 87
2_ALARMA 62.0 62.333333 57 68
3_GRAVE 69.0 69.000000 69 69

726 rows × 4 columns

Mining location Let's use departamento and provincia:

In [21]:
indexList=['semana','departamento','provincia','enfermedad_text']
aggregator={'case':['sum']}
ByWeekPlace=dengue.groupby(indexList,observed=True).agg(aggregator)
ByWeekPlace
Out[21]:
case
sum
semana departamento provincia enfermedad_text
1 AYACUCHO HUANTA 1_SIN_SEÑALES 9
2_ALARMA 2
LA MAR 1_SIN_SEÑALES 5
2_ALARMA 2
CAJAMARCA CONTUMAZA 1_SIN_SEÑALES 16
... ... ... ... ...
52 PIURA MORROPON 1_SIN_SEÑALES 1
PIURA 1_SIN_SEÑALES 13
SULLANA 1_SIN_SEÑALES 9
UCAYALI CORONEL PORTILLO 1_SIN_SEÑALES 23
2_ALARMA 3

2626 rows × 1 columns

In [22]:
#long to wide
ByWeekPlace.unstack()
Out[22]:
case
sum
enfermedad_text 1_SIN_SEÑALES 2_ALARMA 3_GRAVE
semana departamento provincia
1 AYACUCHO HUANTA 9.0 2.0 NaN
LA MAR 5.0 2.0 NaN
CAJAMARCA CONTUMAZA 16.0 NaN NaN
CUSCO LA CONVENCION 18.0 3.0 NaN
HUANUCO LEONCIO PRADO 1.0 NaN NaN
... ... ... ... ... ...
52 MADRE DE DIOS TAMBOPATA 33.0 21.0 2.0
PIURA MORROPON 1.0 NaN NaN
PIURA 13.0 NaN NaN
SULLANA 9.0 NaN NaN
UCAYALI CORONEL PORTILLO 23.0 3.0 NaN

1795 rows × 3 columns

In [23]:
ByWeekPlace_wide=ByWeekPlace.unstack().fillna(0)
ByWeekPlace_wide
Out[23]:
case
sum
enfermedad_text 1_SIN_SEÑALES 2_ALARMA 3_GRAVE
semana departamento provincia
1 AYACUCHO HUANTA 9.0 2.0 0.0
LA MAR 5.0 2.0 0.0
CAJAMARCA CONTUMAZA 16.0 0.0 0.0
CUSCO LA CONVENCION 18.0 3.0 0.0
HUANUCO LEONCIO PRADO 1.0 0.0 0.0
... ... ... ... ... ...
52 MADRE DE DIOS TAMBOPATA 33.0 21.0 2.0
PIURA MORROPON 1.0 0.0 0.0
PIURA 13.0 0.0 0.0
SULLANA 9.0 0.0 0.0
UCAYALI CORONEL PORTILLO 23.0 3.0 0.0

1795 rows × 3 columns

In [24]:
sumCases=ByWeekPlace_wide.sum(axis=1)
sumCases
Out[24]:
semana  departamento   provincia       
1       AYACUCHO       HUANTA              11.0
                       LA MAR               7.0
        CAJAMARCA      CONTUMAZA           16.0
        CUSCO          LA CONVENCION       21.0
        HUANUCO        LEONCIO PRADO        1.0
                                           ... 
52      MADRE DE DIOS  TAMBOPATA           56.0
        PIURA          MORROPON             1.0
                       PIURA               13.0
                       SULLANA              9.0
        UCAYALI        CORONEL PORTILLO    26.0
Length: 1795, dtype: float64
In [25]:
shareAlarma=ByWeekPlace_wide.loc[:,('case','sum','2_ALARMA')]/sumCases
shareAlarma.name='shareAlarma'
shareAlarma
Out[25]:
semana  departamento   provincia       
1       AYACUCHO       HUANTA              0.181818
                       LA MAR              0.285714
        CAJAMARCA      CONTUMAZA           0.000000
        CUSCO          LA CONVENCION       0.142857
        HUANUCO        LEONCIO PRADO       0.000000
                                             ...   
52      MADRE DE DIOS  TAMBOPATA           0.375000
        PIURA          MORROPON            0.000000
                       PIURA               0.000000
                       SULLANA             0.000000
        UCAYALI        CORONEL PORTILLO    0.115385
Name: shareAlarma, Length: 1795, dtype: float64
In [26]:
shareAlarma=shareAlarma.reset_index()
shareAlarma
Out[26]:
semana departamento provincia shareAlarma
0 1 AYACUCHO HUANTA 0.181818
1 1 AYACUCHO LA MAR 0.285714
2 1 CAJAMARCA CONTUMAZA 0.000000
3 1 CUSCO LA CONVENCION 0.142857
4 1 HUANUCO LEONCIO PRADO 0.000000
... ... ... ... ...
1790 52 MADRE DE DIOS TAMBOPATA 0.375000
1791 52 PIURA MORROPON 0.000000
1792 52 PIURA PIURA 0.000000
1793 52 PIURA SULLANA 0.000000
1794 52 UCAYALI CORONEL PORTILLO 0.115385

1795 rows × 4 columns

In [27]:
where = shareAlarma.groupby(['semana','departamento'])['shareAlarma'].idxmax()
worst_prov_week = shareAlarma.loc[where].reset_index(drop=True)
worst_prov_week
Out[27]:
semana departamento provincia shareAlarma
0 1 AYACUCHO LA MAR 0.285714
1 1 CAJAMARCA CONTUMAZA 0.000000
2 1 CUSCO LA CONVENCION 0.142857
3 1 HUANUCO LEONCIO PRADO 0.000000
4 1 ICA PALPA 0.000000
... ... ... ... ...
752 52 LIMA LIMA 1.000000
753 52 LORETO REQUENA 1.000000
754 52 MADRE DE DIOS TAMBOPATA 0.375000
755 52 PIURA MORROPON 0.000000
756 52 UCAYALI CORONEL PORTILLO 0.115385

757 rows × 4 columns

In [28]:
worst_prov_week.shareAlarma.describe()
Out[28]:
count    757.000000
mean       0.240214
std        0.330670
min        0.000000
25%        0.000000
50%        0.062500
75%        0.375000
max        1.000000
Name: shareAlarma, dtype: float64
In [29]:
worst_ProvWeek_alarma=worst_prov_week[worst_prov_week.shareAlarma>0].loc[:,['departamento','provincia']]
worst_ProvWeek_alarma.reset_index(drop=True,inplace=True)
worst_ProvWeek_alarma
Out[29]:
departamento provincia
0 AYACUCHO LA MAR
1 CUSCO LA CONVENCION
2 LA LIBERTAD CHEPEN
3 LORETO MAYNAS
4 MADRE DE DIOS TAMBOPATA
... ... ...
449 LA LIBERTAD TRUJILLO
450 LIMA LIMA
451 LORETO REQUENA
452 MADRE DE DIOS TAMBOPATA
453 UCAYALI CORONEL PORTILLO

454 rows × 2 columns

In [30]:
indexList=['departamento','provincia']
aggregator={'provincia':['count']}
worst_ProvWeek_alarma_Frequency=worst_ProvWeek_alarma.groupby(indexList,observed=True).agg(aggregator)
worst_ProvWeek_alarma_Frequency
Out[30]:
provincia
count
departamento provincia
AMAZONAS UTCUBAMBA 2
ANCASH CASMA 1
SANTA 15
AYACUCHO HUAMANGA 1
HUANTA 17
... ... ...
TUMBES TUMBES 12
ZARUMILLA 5
UCAYALI ATALAYA 18
CORONEL PORTILLO 17
PADRE ABAD 6

62 rows × 1 columns

In [31]:
# final look
worst_ProvWeek_alarma_Frequency.columns=['weeksAffected']
worst_ProvWeek_alarma_Frequency=worst_ProvWeek_alarma_Frequency[worst_ProvWeek_alarma_Frequency.weeksAffected>2]
worst_ProvWeek_alarma_Frequency.reset_index(inplace=True)
worst_ProvWeek_alarma_Frequency
Out[31]:
departamento provincia weeksAffected
0 ANCASH SANTA 15
1 AYACUCHO HUANTA 17
2 AYACUCHO LA MAR 21
3 CUSCO LA CONVENCION 27
4 HUANUCO LEONCIO PRADO 12
5 ICA ICA 7
6 ICA NAZCA 3
7 ICA PALPA 15
8 JUNIN CHANCHAMAYO 12
9 JUNIN SATIPO 6
10 LA LIBERTAD ASCOPE 4
11 LA LIBERTAD CHEPEN 11
12 LA LIBERTAD PACASMAYO 6
13 LA LIBERTAD TRUJILLO 13
14 LA LIBERTAD VIRU 3
15 LAMBAYEQUE CHICLAYO 16
16 LAMBAYEQUE LAMBAYEQUE 4
17 LIMA LIMA 11
18 LORETO DATEM DEL MARAÑON 4
19 LORETO LORETO 5
20 LORETO MAYNAS 32
21 LORETO REQUENA 5
22 MADRE DE DIOS MANU 3
23 MADRE DE DIOS TAHUAMANU 3
24 MADRE DE DIOS TAMBOPATA 32
25 PIURA HUANCABAMBA 5
26 PIURA PAITA 15
27 PIURA PIURA 3
28 PIURA SECHURA 11
29 PIURA SULLANA 8
30 PIURA TALARA 5
31 SAN MARTIN HUALLAGA 3
32 SAN MARTIN MARISCAL CACERES 10
33 SAN MARTIN MOYOBAMBA 6
34 SAN MARTIN SAN MARTIN 6
35 SAN MARTIN TOCACHE 4
36 TUMBES CONTRALMIRANTE VILLAR 5
37 TUMBES TUMBES 12
38 TUMBES ZARUMILLA 5
39 UCAYALI ATALAYA 18
40 UCAYALI CORONEL PORTILLO 17
41 UCAYALI PADRE ABAD 6
In [32]:
alt_worstProv=alt.Chart(worst_ProvWeek_alarma_Frequency)

enc_worstProv=alt_worstProv.encode(
    y='departamento:N',
    x='provincia:N',
    text='weeksAffected:O',
    size='weeksAffected:O'
)

enc_worstProv.mark_text()
Out[32]:

Ahora por departamento

In [33]:
indexList=['semana','departamento','enfermedad_text']
aggregator={'case':['sum']}
ByWeekDepa=dengue.groupby(indexList,observed=True).agg(aggregator)
ByWeekDepa_wide=ByWeekDepa.unstack().fillna(0)
ByWeekDepaAlarm=ByWeekDepa_wide.loc[:,('case','sum','2_ALARMA')]/ByWeekDepa_wide.sum(axis=1)
ByWeekDepaAlarm.name='alarmShare'

ByWeekDepaAlarm=ByWeekDepaAlarm.reset_index()
ByWeekDepaAlarm
Out[33]:
semana departamento alarmShare
0 1 AYACUCHO 0.222222
1 1 CAJAMARCA 0.000000
2 1 CUSCO 0.142857
3 1 HUANUCO 0.000000
4 1 ICA 0.000000
... ... ... ...
752 52 LIMA 1.000000
753 52 LORETO 0.478261
754 52 MADRE DE DIOS 0.375000
755 52 PIURA 0.000000
756 52 UCAYALI 0.115385

757 rows × 3 columns

In [34]:
ByWeekDepaAlarm.describe()
Out[34]:
semana alarmShare
count 757.000000 757.000000
mean 25.394980 0.122590
std 14.666437 0.184344
min 1.000000 0.000000
25% 13.000000 0.000000
50% 24.000000 0.040000
75% 38.000000 0.166667
max 52.000000 1.000000
In [35]:
ByWeekDepaAlarm_focus=ByWeekDepaAlarm[ByWeekDepaAlarm.alarmShare>0]
In [36]:
ByWeekDepaAlarm_focus.describe()
Out[36]:
semana alarmShare
count 454.000000 454.000000
mean 22.991189 0.204407
std 14.144942 0.199880
min 1.000000 0.003367
25% 12.000000 0.058824
50% 21.000000 0.142857
75% 34.000000 0.285714
max 52.000000 1.000000
In [37]:
edges=[-1, .10, .25, .5,1]
theLabels=["a.below10%","b.11-25%","c.26-50%","d.above50%"]
ByWeekDepaAlarm_focus.loc[:,"alarmLevels"]=pd.cut(ByWeekDepaAlarm_focus['alarmShare'],
                                            include_lowest=True,
                                            bins=edges,
                                            labels=theLabels,
                                            ordered=True)

##
ByWeekDepaAlarm_focus.head()
<ipython-input-37-bd957ecc8bae>:3: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  ByWeekDepaAlarm_focus.loc[:,"alarmLevels"]=pd.cut(ByWeekDepaAlarm_focus['alarmShare'],
Out[37]:
semana departamento alarmShare alarmLevels
0 1 AYACUCHO 0.222222 b.11-25%
2 1 CUSCO 0.142857 b.11-25%
6 1 LA LIBERTAD 1.000000 d.above50%
7 1 LORETO 0.153846 b.11-25%
8 1 MADRE DE DIOS 0.250000 b.11-25%
In [38]:
alt_WorstDepa=alt.Chart(ByWeekDepaAlarm_focus).encode(x='semana:O',
                                                      y=alt.Y('departamento:N',
                                                              sort=alt.EncodingSortField(field='alarmShare',op='max',order='descending')))
enc1_WorstDepa=alt_WorstDepa.encode(
    color=alt.Color('alarmLevels:O').scale(scheme="lightgreyred", reverse=False)
)

enc1_WorstDepa.mark_rect()
Out[38]:
In [39]:
enc2_WorstDepa=alt_WorstDepa.encode(
    text=alt.Text('alarmShare:Q', format=".1f"),
    opacity=alt.condition('datum.alarmShare >= 0.3', alt.value(1), alt.value(0)))
enc2_WorstDepa.mark_text(fontStyle='bold')
Out[39]:
In [40]:
enc1_WorstDepa.mark_rect() + enc2_WorstDepa.mark_text()
Out[40]: